Expand code
cite_packages(output = "table", pkgs = "Session", out.dir = getwd()) Package Version Citation
1 base 4.4.2 @base
2 patchwork 1.3.0 @patchwork
3 tidyverse 2.0.0 @tidyverse
cite_packages(output = "table", pkgs = "Session", out.dir = getwd()) Package Version Citation
1 base 4.4.2 @base
2 patchwork 1.3.0 @patchwork
3 tidyverse 2.0.0 @tidyverse
The 16S ribosomal RNA data is from several sequencing projects. In chronological order, P15009 contains the groundwater samples from t0 (sampled Oct 2019), while P21015 contains the samples from t24 (sampled Feb 2020) from identical groundwaters. P21015 contains the sequencing libraries from the cultures as part of the inital experiment (one meteoric groundwater), while P26201 contains the libraries from the cultures as part of the follow-up experiment (using three groundwaters as inoculum).
All 16S libraries were processed with the bioinformatic pipeline nf-core/ampliseq (v2.12.0), using the SBDI-GTDB (R09-RS220-1) as a reference.
seqtab <- read_tsv("data/ASV_table.tsv.gz", col_types = cols(.default = col_character(),
count = col_integer()
)) %>%
group_by(sample) %>% mutate(relab = count / sum(count)) %>% ungroup()
tax <- read_tsv("data/ASV_tax.tsv.gz", col_types = cols(.default = col_character()))
smd <- read_tsv("data/smd.tsv", col_types = cols(.default = col_character())) %>%
filter(!sample %in% c("P21015_1082","P21015_1083","P21015_1084","P21015_1087","P21015_1088","P21015_1089")) %>%
filter(fraction != "control")This project includes 15 metagenomes from 15 unique cultures. The metagenomes contain 35 de-replicated metagenome-assembled genomes (MAG) that are affiliated with the Bacillota (7), Desulfobacterota (7), Pseudomonadota (6), Bacteroidota (4), Patescibacteria (3), Nanoarchaeota (2), Acidobacteriota (1), Campylobacterota (1), Chloroflexota (1), Iainarchaeota (1), Spirochaetota (1), and Verrucomicrobiota (1).
coverm <- read_tsv("data/coverm.tsv.gz", col_types = cols(.default = col_character(), tpm = col_double()))
emapper <- read_tsv("data/emapper.tsv.gz", show_col_types = FALSE)
# Sample 35 (l7f) was removed from the binning process as this sample did not produce any bins
bintax <- read_tsv("data/gtdbtk.summary.tsv", col_types = cols(.default = col_character()))
quality <- read_tsv("data/quality_report.tsv", show_col_types = F) %>%
rename_with(tolower) %>%
rename(genome = name) %>%
filter(genome %in% coverm$genome)cols.gw <- c(
"KR0015" = "#8D8680",
"SA1420" = "#A2A475",
"SA2600" = "#C7B19C"
)
cols.origin <- c(
"meteoric" = "#8D8680",
"marine" = "#A2A475",
"saline" = "#C7B19C"
)
cols.fraction <- c(
"environmental" = "#FAEFD1",
"fractionated" = "#A2A475",
"non-fractionated" = "#1B5331"
)
cols.mag <- c(
"Other" = "#899DA4",
"Nanoarchaeota" = "#046C9A",
"Patescibacteria" = "#C7B19C",
"Pseudomonadota" = "#972D15",
"Desulfobacterota" = "#A2A475",
"Bacillota" = "#1B5331"
)
cols.phylum <- c( # Sorted on abundance
"Patescibacteria" = "#C7B19C",
"Desulfobacterota" = "#A2A475",
"Chloroflexota" = "#D69C4E",
"Atribacterota" = "#972D15",
"Acidobacteriota" = "#FAEFD1",
"Omnitrophota" = "#00A08A",
"Nitrospirota" = "#D8B70A",
"Campylobacterota" = "#1B5331",
"Other" = "#899DA4",
"Unidentified" = "#046C9A"
)The 16S data from the groundwater samples are rarefied to check if there is sufficient sequencing depth to estimate microbial diversity.
t0 <- c(
"P15009_1057","P15009_1059","P15009_1060","P21015_1020","P21015_1021","P21015_1022", # KR0015
"P15009_1061","P15009_1065","P15009_1066","P21015_1023","P21015_1024","P21015_1025", # SA1420
"P15009_1069","P15009_1062","P15009_1063","P21015_1029","P21015_1030","P21015_1031" # SA2600
)
# Perform the subsampling
rare <- seqtab %>%
# Remove negative control
filter(sample %in% t0) %>%
select(-relab) %>%
pivot_wider(names_from = "seqid", values_from = "count", values_fill = 0) %>%
column_to_rownames("sample") %>%
vegan::rarecurve(sample = 10000, step = 100, tidy = TRUE) %>%
tibble() %>%
rename_with(tolower)rare %>%
inner_join(smd, by = c("site" = "sample")) %>%
# One sample has ~ 1 million reads
filter(sample < 150000) %>%
ggplot(aes(sample, species, group = site, colour = origin)) +
geom_line(linewidth = 0.75) +
scale_color_manual(values = cols.origin, name = "Groundwater", labels = c("Meteoric", "Marine", "Saline")) +
labs(x = "No. of sequenced reads", y = "Rarefied No. of ASVs") +
scale_x_continuous(labels = c("0", "50.000", "100.000", "150.000")) +
theme_tidy() +
theme(
panel.border = element_rect(fill = "transparent", linewidth = 0.75, colour = "#000000"),
)ggsave(filename = "figures/fig-s1.pdf", width = 100, height = 80, units = "mm")site <- magick::image_read("figures/äspö-mod.png")
p1 <- ggplot() +
annotation_custom(grid::rasterGrob(site)) +
theme_minimal() + coord_fixed() +
theme(
plot.margin = margin(),
aspect.ratio = 0.75
)As the hydrochemistry data is from a monitoring program, the sampling data closest to groundwater sampling was selected. All units are mg l-1, except for EC (mS m-1), pH, T (degrees C), and O18 (ppt SMOW)
chem <- read_tsv("data/sicada.txt", show_col_types = FALSE) %>%
rename_with(tolower) %>%
filter(sampling) %>%
select(groundwater = idcode, so4, feii, ph_lab, ec_lab, temp_w, doc, nh4_n, o18, s34_so4) %>%
inner_join(smd[,c("sample","groundwater")], by = "groundwater")The number of ASVs per sample are estimated with the specnumber() function from the Vegan library.
adiv <- seqtab %>%
select(-relab) %>%
pivot_wider(names_from = "seqid", values_from = "count", values_fill = 0) %>%
column_to_rownames('sample') %>%
vegan::specnumber() %>% as.data.frame() %>%
rownames_to_column('sample') %>% #filter(sample %in% t0) %>%
rename(specnumber = 2)
chem <- adiv %>% # Add the species richness for each sample
inner_join(chem, by = "sample") In order to run the RDA, the absolute counts are standardized using a Hellinger transformation (square root of the relative abundance).
hellinger <- seqtab %>%
filter(sample %in% t0) %>%
# Standard Vegan transformation: Turn table with with samples as *rows*
select(sample, seqid, count) %>%
pivot_wider(names_from = "seqid", values_from = "count", values_fill = 0) %>%
# Turn into a numeric matrix
column_to_rownames('sample') %>% vegan::decostand(method = "hellinger") %>%
rownames_to_column("sample") %>%
pivot_longer(-1, names_to = "seqid", values_to = "hellinger")
m <- hellinger %>% # Create a matrix
spread(seqid, hellinger) %>%
column_to_rownames("sample")The RDA is called while providing a large number of constraining factors, as the RDA function will by default not include redundant variables (Pearson r > 0.9).
set.seed(999)
vegan::rda(
m ~ o18 + doc + ph_lab + ec_lab + temp_w + nh4_n + feii,
correlation = T,
data = chem %>%
semi_join(chem, by = 'sample') %>%
filter(sample %in% t0) %>%
arrange(sample) %>%
column_to_rownames('sample')
) -> rda
Some constraints or conditions were aliased because they were redundant. This
can happen if terms are linearly dependent (collinear): 'ph_lab', 'ec_lab',
'temp_w', 'nh4_n', 'feii'
# This one will contain the proportions explained which we get by dividing the
# eigenvalue of each component with the sum of eigenvalues for all components.
p <- c(rda$CCA$eig, rda$CA$eig)/sum(c(rda$CCA$eig, rda$CA$eig))
# This one is collecting the coordinates for arrows that will depict how the
# factors in our model point in the coordinate
a <- rda$CCA$biplot %>% data.frame() %>% tibble::rownames_to_column('factor')
lab <- tibble(factor = c("o18", "doc"),
label = c("delta^18*O", "DOC")
) %>% inner_join(a, by = "factor")
summary(rda)
Call:
rda(formula = m ~ o18 + doc + ph_lab + ec_lab + temp_w + nh4_n + feii, data = chem %>% semi_join(chem, by = "sample") %>% filter(sample %in% t0) %>% arrange(sample) %>% column_to_rownames("sample"), correlation = T)
Partitioning of variance:
Inertia Proportion
Total 0.6761 1.0000
Constrained 0.3871 0.5725
Unconstrained 0.2890 0.4275
Eigenvalues, and their contribution to the variance
Importance of components:
RDA1 RDA2 PC1 PC2 PC3 PC4 PC5
Eigenvalue 0.2338 0.1533 0.05443 0.04926 0.04333 0.02922 0.02273
Proportion Explained 0.3458 0.2267 0.08051 0.07286 0.06409 0.04322 0.03363
Cumulative Proportion 0.3458 0.5725 0.65305 0.72591 0.79001 0.83323 0.86686
PC6 PC7 PC8 PC9 PC10 PC11 PC12
Eigenvalue 0.01598 0.01338 0.01224 0.0098 0.008638 0.007691 0.007139
Proportion Explained 0.02363 0.01979 0.01810 0.0145 0.012776 0.011376 0.010559
Cumulative Proportion 0.89049 0.91028 0.92837 0.9429 0.955646 0.967022 0.977581
PC13 PC14 PC15
Eigenvalue 0.006254 0.004717 0.004186
Proportion Explained 0.009250 0.006977 0.006192
Cumulative Proportion 0.986831 0.993808 1.000000
Accumulated constrained eigenvalues
Importance of components:
RDA1 RDA2
Eigenvalue 0.2338 0.1533
Proportion Explained 0.6040 0.3960
Cumulative Proportion 0.6040 1.0000
vegan::RsquareAdj(rda)$r.squared
[1] 0.5725442
$adj.r.squared
[1] 0.5155501
p2 <- rda$CCA$wa %>% data.frame() %>% tibble::rownames_to_column('sample') %>%
inner_join(chem, by = 'sample') %>%
ggplot(aes(x = RDA1, y = RDA2)) +
geom_vline(xintercept = 0, colour = "grey", linetype = "dotted") +
geom_hline(yintercept = 0, colour = "grey", linetype = "dotted") +
geom_point(aes(colour = groundwater, size = specnumber), shape = 1, stroke = 0.5) +
xlab(sprintf("RDA1 (%2.1f%% explained)", p[[1]] * 100)) +
ylab(sprintf("RDA2 (%2.1f%% explained)", p[[2]] * 100)) +
geom_segment(
data = a, mapping = aes(xend = RDA1/5, yend = RDA2/5), linewidth = 0.3,
x = 0, y = 0, arrow = arrow(length = unit(1.5, "mm"), type = "closed")
) +
geom_label(
data = lab, mapping = aes(x = RDA1/10, y = RDA2/10, label = label),
size = 6/.pt, parse = TRUE, label.size = 0, label.padding = unit(0.1, "lines")
) +
scale_size(range = c(1,8), name = "No. of ASVs") +
annotate("text", x = -0.07, y =-0.3, label = "Meteoric", colour = cols.gw[1], size = 7/.pt) +
annotate("text", x = 0.06, y = 0.28, label = "Marine", colour = cols.gw[2], size = 7/.pt) +
annotate("text", x = 0.2, y = -0.05, label = "Saline", colour = cols.gw[3], size = 7/.pt) +
stat_ellipse(aes(x = RDA1, y = RDA2, colour = groundwater),
geom = "polygon", show.legend = F,
fill = NA, linetype = "dashed") +
# Add R-squared
annotate("text", x = Inf, y = -Inf,
label = "paste(R ^ 2, \" = 0.52\")",
size = 6/.pt, vjust = -0.5, hjust = 1.05, parse = TRUE) +
scale_color_manual(name = "",values = cols.gw, guide = "none") +
scale_fill_manual(name = "",values = cols.gw, guide = "none") +
theme_tidy(fontsize = 6) +
theme(
aspect.ratio = 1.0,
legend.background = element_rect(fill = NA),
panel.border = element_rect(fill = "transparent", linewidth = 0.75),
legend.text = element_text(margin = margin(r = 12, unit = "pt")),
legend.position = "inside",
legend.position.inside = c(0.9,0.85)
)To visualize the community composition, the most abundant phyla are identified while grouping less abundant phyla as “Other”. ASVs that are not identified on phylum level are included as “Unidentified”.
t <- seqtab %>%
filter(sample %in% t0) %>%
inner_join(tax, by = "seqid") %>%
group_by(phylum, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(relab = sum(relab), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean_relab = sum(relab), .groups = 'drop') %>%
filter(!is.na(phylum)) %>%
top_n(8, mean_relab)
t %>% arrange(desc(mean_relab)) %>% knitr::kable()| phylum | mean_relab |
|---|---|
| Patescibacteria | 7.1312325 |
| Desulfobacterota | 2.7429408 |
| Chloroflexota | 1.2292858 |
| Atribacterota | 1.2125307 |
| Acidobacteriota | 0.9182271 |
| Omnitrophota | 0.7237157 |
| Nitrospirota | 0.3144247 |
| Campylobacterota | 0.2617040 |
taxref <- tax %>%
left_join(t %>% transmute(phylum, topphylum = phylum), by = "phylum") %>%
mutate(topphylum = if_else(is.na(phylum), "Unidentified", topphylum)) %>%
replace_na(list("topphylum" = "Other"))p3 <- seqtab %>%
filter(sample %in% t0) %>%
inner_join(taxref, by = "seqid") %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, topphylum) %>%
summarise(mrelab = sum(relab), .groups = 'drop') %>%
# Call the plot
ggplot(aes(x = fct_relevel(sample, t0),
y = mrelab * 100,
fill = fct_relevel(topphylum, names(cols.phylum) %>% rev())
)
) +
labs(x = '', y = 'Relative abundance (%)', fill = "") +
geom_col() + scale_y_continuous(expand = c(0.01,0), limits = c(-10,101)) +
annotate(geom = "segment", x = 0.55, xend = 3.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 3.55, xend = 6.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 6.55, xend = 9.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 9.55, xend = 12.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 12.55, xend = 15.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 15.55, xend = 18.45, y = -1, yend = -1, linewidth = 0.5) +
# Labels dates
annotate(geom = "text", x = 2, y = -3.5, label = "Oct 2019", size = 6/.pt) +
annotate(geom = "text", x = 5, y = -3.5, label = "April 2020", size = 6/.pt) +
annotate(geom = "text", x = 8, y = -3.5, label = "Oct 2019", size = 6/.pt) +
annotate(geom = "text", x = 11, y = -3.5, label = "April 2020", size = 6/.pt) +
annotate(geom = "text", x = 14, y = -3.5, label = "Oct 2019", size = 6/.pt) +
annotate(geom = "text", x = 17, y = -3.5, label = "April 2020", size = 6/.pt) +
# Labels groundwater
annotate(geom = "text", x = 3.5, y = -8, label = "Meteoric", size = 6/.pt) +
annotate(geom = "text", x = 9.5, y = -8, label = "Marine", size = 6/.pt) +
annotate(geom = "text", x =15.5, y = -8, label = "Saline", size = 6/.pt) +
scale_fill_manual(values = cols.phylum) +
guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
aspect.ratio = 0.75,
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.key.height = unit(3.5, "mm"),
legend.margin = margin(t = -20),
legend.text = element_text(margin = margin(l = 2)),
panel.border = element_rect(fill = "transparent", linewidth = 0.75),
legend.key.spacing.y = unit(0, "mm")
)adiv <- seqtab %>%
select(-relab) %>%
spread(seqid, count, fill = 0) %>%
column_to_rownames('sample') %>%
vegan::diversity() %>% as.data.frame() %>%
rownames_to_column('sample') %>%
rename(shannon = 2) %>%
inner_join(smd, by = "sample")
adiv %>% filter(sample %in% t0) %>%
group_by(groundwater) %>%
summarise(mean = round(mean(shannon), digits = 2), sd = round(sd(shannon), digits = 2), n = n())# A tibble: 3 × 4
groundwater mean sd n
<chr> <dbl> <dbl> <int>
1 KR0015 5.66 0.62 6
2 SA1420 3.27 1.07 6
3 SA2600 3.03 0.44 6
p4 <- adiv %>%
filter(fraction != "control") %>%
mutate(fraction = factor(fraction, levels = c("environmental","fractionated","non-fractionated"))) %>%
ggplot(aes(
x = groundwater,
y = shannon,
fill = fraction
)) +
geom_boxplot( outlier.colour = "white") +
labs(x = "", y = "Shannon's diversity index (H)", fill = "") +
theme_tidy(legend = "inside", fontsize = 6) +
scale_x_discrete(labels = c("Meteoric", "Marine", "Saline")) +
scale_fill_manual(values = cols.fraction, labels = c("Environmental", "< 0.45 µm", "Non-fractionated")) +
theme(
aspect.ratio = 1.0,
panel.border = element_rect(fill = "transparent", linewidth = 0.75),
legend.margin = margin(),
legend.background = element_rect(fill = "NA"),
legend.position.inside = c(0.8,0.88),
legend.text = element_text(margin = margin(l = 1))
)p1 + p2 + p3 + p4 + plot_layout(nrow = 2) + plot_annotation(tag_levels = "a") &
theme(
plot.margin = margin(t = 4, r = .1),
plot.tag.location = "panel",
plot.tag.position = c(-0.07, 1.0),
plot.tag = element_text(size = 8, face = "bold")
)ggsave("figures/fig-1.pdf", width = 180, height = 180, units = "mm")t <- tax %>%
mutate(rank = case_when(
class == "ABY1" ~ "ABY1",
class == "Paceibacteria" ~ "Paceibacteria",
class == "Microgenomatia" ~ "Microgenomatia",
class == "JS1" ~ "JS1",
order == "Syntrophales" ~ "Syntrophales",
order == "Desulfobulbales" ~ "Desulfobulbales",
order == "Anaerolineales" ~ "Anaerolineales",
order == "Thermodesulfovibrionales" ~ "Thermodesulfovibrionales",
order == "Gygaellales" ~ "Gygaellales",
genus == "Sulfuricurvum" ~ "Sulfuricurvum",
.default = NA
)) %>% filter(!is.na(rank))
cols.rank <- c( # Sorted on abundance
"ABY1" = "#C7B19C",
"Paceibacteria" = "#A2A475",
"Microgenomatia" = "#D69C4E",
"JS1" = "#972D15",
"Syntrophales" = "#FAEFD1",
"Desulfobulbales" = "#00A08A",
"Anaerolineales" = "#D8B70A",
"Thermodesulfovibrionales" = "#1B5331",
"Gygaellales" = "#899DA4",
"Sulfuricurvum" = "#046C9A"
)seqtab %>%
filter(sample %in% t0) %>%
inner_join(t, by = "seqid") %>%
group_by(sample, rank) %>%
summarise(mrelab = sum(relab), .groups = 'drop') %>%
# Call the plot
ggplot(aes(x = fct_relevel(sample, t0),
y = mrelab * 100,
fill = fct_relevel(rank, names(cols.rank) %>% rev())
)
) +
labs(x = '', y = 'Relative abundance (%)', fill = "") +
geom_col() +
annotate(geom = "segment", x = 0.55, xend = 3.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 3.55, xend = 6.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 6.55, xend = 9.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 9.55, xend = 12.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 12.55, xend = 15.45, y = -1, yend = -1, linewidth = 0.5) +
annotate(geom = "segment", x = 15.55, xend = 18.45, y = -1, yend = -1, linewidth = 0.5) +
# Labels dates
annotate(geom = "text", x = 2, y = -3.5, label = "Oct 2019", size = 6/.pt) +
annotate(geom = "text", x = 5, y = -3.5, label = "April 2020", size = 6/.pt) +
annotate(geom = "text", x = 8, y = -3.5, label = "Oct 2019", size = 6/.pt) +
annotate(geom = "text", x = 11, y = -3.5, label = "April 2020", size = 6/.pt) +
annotate(geom = "text", x = 14, y = -3.5, label = "Oct 2019", size = 6/.pt) +
annotate(geom = "text", x = 17, y = -3.5, label = "April 2020", size = 6/.pt) +
# Labels groundwater
annotate(geom = "text", x = 3.5, y = -8, label = "Meteoric", size = 6/.pt) +
annotate(geom = "text", x = 9.5, y = -8, label = "Marine", size = 6/.pt) +
annotate(geom = "text", x =15.5, y = -8, label = "Saline", size = 6/.pt) +
scale_fill_manual(values = cols.rank) +
guides(fill = guide_legend(ncol = 3, reverse = TRUE)) +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
aspect.ratio = 0.75,
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.key.height = unit(3.5, "mm"),
legend.margin = margin(t = -20),
legend.text = element_text(margin = margin(l = 2)),
panel.border = element_rect(fill = "transparent", linewidth = 0.75),
legend.key.spacing.y = unit(0, "mm")
)ggsave(filename = "figures/fig-s2.pdf", width = 120, height = 90, units = "mm")t <- c(
"Spirochaetota", "Acidobacteriota", "Atribacterota", "Chloroflexota",
"Omnitrophota", "Nanoarchaeota", "Campylobacterota", "Pseudomonadota"
)
network.survey <- read_tsv("data/network-survey.tsv", show_col_types = F)
# Correlation between the abundance of the phyla using the cultures
fit <- tibble("phylum" = t, "r.squared" = NA)
for (i in t) {
model <- lm(relab ~ cpr, data = network.survey %>% filter(phylum == i)) %>% summary()
fit[fit$phylum == i, "r.squared"] <- model$adj.r.squared %>% round(digits = 2)
}
fit# A tibble: 8 × 2
phylum r.squared
<chr> <dbl>
1 Spirochaetota 0.57
2 Acidobacteriota 0.55
3 Atribacterota 0.51
4 Chloroflexota 0.14
5 Omnitrophota 0.14
6 Nanoarchaeota 0.08
7 Campylobacterota 0.05
8 Pseudomonadota -0.01
p1 <- network.survey %>%
mutate(phylum = factor(phylum, levels = fit$phylum)) %>%
ggplot(aes(
x = sqrt(cpr * 100),
y = sqrt(relab * 100)
)) +
geom_point(shape = 21, stroke = 0.75, fill = "white") +
geom_label(data = fit %>% mutate(phylum = factor(phylum, levels = phylum)),
aes(x = Inf, y = -Inf, label = paste("R^{2}:", r.squared)),
size = 6/.pt, hjust = 1, vjust = 0, parse = TRUE, label.size = 0, fill = "white") +
geom_smooth(method = "lm", formula = y ~ x, colour = "black", fill = "#D9D9D9", linewidth = 0.5) +
labs(x = "Sqrt relative abundance Patescibacteria (%)", y = "Sqrt relative abundance (%)") +
theme_tidy(fontsize = 6) +
scale_y_continuous(labels = function(x) sprintf("%.1f", x)) +
facet_wrap(~phylum, scales = "free", ncol = 4) +
theme(
strip.background = element_rect(fill = "#F0F0F0")
)t <- c(
"Omnitrophota", "Chloroflexota", "Pseudomonadota", "Actinomycetota",
"Verrucomicrobiota", "Acidobacteriota", "Bacillota", "Spirochaetota"
)
network <- seqtab %>%
inner_join(smd, by = "sample") %>%
filter(fraction == "non-fractionated") %>%
inner_join(tax, by = "seqid") %>% filter(phylum %in% t) %>%
group_by(sample, phylum) %>% summarise(relab = sum(relab), .groups = "drop")
network <- seqtab %>%
inner_join(tax, by = "seqid") %>% filter(phylum == "Patescibacteria") %>%
group_by(sample, phylum) %>% summarise(cpr = sum(relab), .groups = "drop") %>% select(-phylum) %>%
inner_join(network, by = "sample")
# Correlation between the abundance of the phyla using the cultures
fit <- tibble("phylum" = t, "r.squared" = NA)
for (i in t) {
model <- lm(relab ~ cpr, data = network %>% filter(phylum == i)) %>% summary()
fit[fit$phylum == i, "r.squared"] <- model$adj.r.squared %>% round(digits = 2)
}
fit <- fit %>% arrange(desc(r.squared))
fit# A tibble: 8 × 2
phylum r.squared
<chr> <dbl>
1 Omnitrophota 0.81
2 Chloroflexota 0.6
3 Actinomycetota 0.03
4 Pseudomonadota 0.02
5 Bacillota -0.01
6 Verrucomicrobiota -0.02
7 Acidobacteriota -0.02
8 Spirochaetota -0.02
n <- network %>%
group_by(phylum) %>% summarise(no = n(), .groups = "drop") %>%
inner_join(fit, by = "phylum") %>% mutate(no = as.character(no)) %>%
mutate(phylum = factor(phylum, levels = fit$phylum)) %>%
arrange(phylum)
p2 <- network %>%
mutate(phylum = factor(phylum, levels = fit$phylum)) %>%
ggplot(aes(
x = sqrt(cpr * 100),
y = sqrt(relab * 100)
)) +
geom_point(shape = 21, stroke = 0.75, fill = "white") +
geom_label(data = n,
aes(x = Inf, y = -Inf, label = paste("R^{2}: ", r.squared, " (", no, ")", sep = "")),
size = 6/.pt, hjust = 1.0, vjust = -0.0, parse = TRUE, label.size = 0, fill = "white") +
geom_smooth(method = "lm", formula = y ~ x, colour = "black", fill = "#D9D9D9", linewidth = 0.5) +
labs(x = "Sqrt relative abundance Patescibacteria (%)", y = "Sqrt relative abundance (%)") +
theme_tidy(fontsize = 6) +
scale_y_continuous(labels = function(x) sprintf("%.1f", x)) +
facet_wrap(~phylum, scales = "free", ncol = 4) +
theme(
strip.background = element_rect(fill = "#F0F0F0")
)p1 / p2 + plot_annotation(tag_levels = c("a", "b")) & theme(
panel.border = element_rect(fill = "transparent", linewidth = 0.5),
axis.ticks.length = unit(0.5, "mm"),
plot.margin = margin(t = 4, r = 1),
plot.tag = element_text(),
plot.tag.position = c(0.01, 0.96),
strip.background = element_blank(),
strip.text = element_text(margin = margin(b = 2))
)ggsave("figures/fig-6.pdf", width = 160, height = 160, units = "mm")t <- c(
"Spirochaetota", "Acidobacteriota", "Atribacterota", "Chloroflexota",
"Omnitrophota", "Nanoarchaeota", "Campylobacterota", "Pseudomonadota"
)
# The file survey-amplicons.tsv contains the ASVs from Äspö groundwaters published in https://doi.org/10.3389/fmicb.2018.02880
survey.amplicons <- read_tsv("data/survey-amplicons.tsv.gz", show_col_types = FALSE) %>%
group_by(sample) %>% mutate(relab = count / sum(count)) %>% ungroup()
cpr <- survey.amplicons %>%
filter(phylum == "Patescibacteria") %>%
group_by(sample, order) %>% summarise(cpr = sum(relab), .groups = "drop")
network.survey <- survey.amplicons %>%
filter(phylum %in% t) %>%
group_by(sample, order) %>% summarise(relab = sum(relab), .groups = "drop") %>%
# Add colum with abundance of CPR
left_join(cpr, by = "sample", relationship = "many-to-many") %>%
rename(order.clade = order.x, order.cpr = order.y)
# The filter > 19 implies that the two orders have to co-occur at least ten times
fit <- network.survey %>%
group_by(order.clade, order.cpr) %>% summarize(n = n(), .groups = "drop_last") %>%
filter(n > 19) %>% ungroup() %>% na.omit()
for (i in 1:nrow(fit)) {
data = network.survey %>% filter(order.clade == fit[i,]$order.clade & order.cpr == fit[i,]$order.cpr)
model <- lm(relab ~ cpr, data = data)
fit[i, "r.squared"] <- summary(model)$r.squared
}
fit <- fit %>% slice_max(r.squared, n = 8) %>%
mutate(clade = paste(order.clade, order.cpr)) %>%
mutate(r.squared = signif(.$r.squared, digits = 2)) %>%
arrange(desc(r.squared)) %>%
mutate(n = as.character(n))label <- c(
"Limnocylindrales UBA9983" = "Limnocylindrales\n-\nUBA9983",
"Limnocylindrales UBA12405" = "Limnocylindrales\n-\nUBA12405",
"Limnocylindrales JAHJFT01" = "Limnocylindrales\n-\nJAHJFT01",
"SCGC-AAA011-G17 UBA2169" = "SCGC-AAA011-G17\n-\nUBA2169",
"Limnocylindrales JAHISW01" = "Limnocylindrales\n-\nJAHISW01",
"UBA2200 JAHJFT01" = "UBA2200\n-\nJAHJFT01",
"Limnocylindrales UBA1369" = "Limnocylindrales\n-\nUBA1369",
"Limnocylindrales Paceibacterales" = "Limnocylindrales\n-\nPaceibacterales"
)
p1 <- network.survey %>%
mutate(clade = paste(order.clade, order.cpr)) %>%
filter(clade %in% fit$clade) %>%
mutate(clade = factor(clade, levels = fit$clade)) %>%
ggplot(aes(
x = sqrt(cpr * 100),
y = sqrt(relab * 100)
)) +
geom_point(shape = 21, stroke = 0.75, fill = "white") +
geom_label(data = fit %>% mutate(clade = factor(clade, levels = clade)),
aes(x = Inf, y = -Inf, label = paste("R^{2}: ", r.squared, " (", n, ")", sep = "")),
size = 6/.pt, hjust = 1.0, vjust = -0.0, parse = TRUE, label.size = 0, fill = "white") +
geom_smooth(method = "lm", formula = y ~ x, colour = "black", fill = "#D9D9D9", linewidth = 0.5) +
labs(x = "Sqrt relative abundance Patescibacteria (%)", y = "Sqrt relative abundance (%)") +
theme_tidy(fontsize = 6) +
scale_y_continuous(labels = function(x) sprintf("%.1f", x)) +
facet_wrap(~clade, scales = "free", ncol = 4, labeller = as_labeller(label)) +
theme(
strip.background = element_rect(fill = "#F0F0F0")
)t <- c(
"Omnitrophota", "Chloroflexota", "Pseudomonadota", "Actinobacteriota",
"Verrucomicrobiota", "Acidobacteriota", "Bacillota", "Spirochaetota"
)
cpr <- seqtab %>%
inner_join(smd, by = "sample") %>%
filter(fraction == "non-fractionated") %>%
inner_join(tax, by = "seqid") %>% filter(phylum == "Patescibacteria") %>%
group_by(sample, order) %>% summarise(cpr = sum(relab), .groups = "drop")
network <- seqtab %>%
inner_join(smd, by = "sample") %>%
filter(fraction == "non-fractionated") %>%
inner_join(tax, by = "seqid") %>% filter(phylum %in% t) %>%
group_by(sample, order) %>% summarise(relab = sum(relab), .groups = "drop") %>%
# Add colum with abundance of CPR
left_join(cpr, by = "sample", relationship = "many-to-many") %>%
rename(order.clade = order.x, order.cpr = order.y)
# The filter > 19 implies that the two orders have to co-occur at least ten times
fit <- network %>%
group_by(order.clade, order.cpr) %>% summarize(n = n(), .groups = "drop_last") %>%
filter(n > 19) %>% ungroup() %>% na.omit()
for (i in 1:nrow(fit)) {
data = network %>% filter(order.clade == fit[i,]$order.clade & order.cpr == fit[i,]$order.cpr)
model <- lm(relab ~ cpr, data = data)
fit[i, "r.squared"] <- summary(model)$adj.r.squared
}
fit <- fit %>% slice_max(r.squared, n = 8) %>%
mutate(clade = paste(order.clade, order.cpr)) %>%
mutate(r.squared = signif(.$r.squared, digits = 2)) %>%
arrange(desc(r.squared)) %>%
mutate(n = as.character(n))label = c(
"2-01-FULL-45-10 BM507" = "2-01-FULL-45-10\n-\nBM507",
"Gygaellales BM507" = "Gygaellales\n-\nBM507",
"Gorgyraeales BM507" = "Gorgyraeales\n-\nBM507",
"Pluralincolimonadales Portnoybacterales" = "Pluralincolimonadales\n-\nPortnoybacterales",
"Gygaellales Magasanikbacterales" = "Gygaellales\n-\nMagasanikbacterales",
"Pluralincolimonadales BM507" = "Pluralincolimonadales\n-\nBM507",
"Gygaellales Portnoybacterales" = "Gygaellales\n-\nPortnoybacterales",
"Gygaellales SG8-24" = "Gygaellales\n-\nSG8-24"
)
p2 <- network %>%
mutate(clade = paste(order.clade, order.cpr)) %>%
filter(clade %in% fit$clade) %>%
mutate(clade = factor(clade, levels = fit$clade)) %>%
ggplot(aes(
x = sqrt(cpr * 100),
y = sqrt(relab * 100)
)) +
geom_point(shape = 21, stroke = 0.75, fill = "white") +
geom_label(data = fit %>% mutate(clade = factor(clade, levels = clade)),
aes(x = Inf, y = -Inf, label = paste("R^{2}: ", r.squared, " (", n, ")", sep = "")),
size = 6/.pt, hjust = 1.0, vjust = -0.0, parse = TRUE, label.size = 0, fill = "white") +
geom_smooth(method = "lm", formula = y ~ x, colour = "black", fill = "#D9D9D9", linewidth = 0.5) +
labs(x = "", y = "Sqrt relative abundance (%)") +
theme_tidy(fontsize = 6) +
scale_y_continuous(labels = function(x) sprintf("%.1f", x)) +
facet_wrap(~clade, scales = "free", ncol = 4, labeller = as_labeller(label)) +
theme(
strip.background = element_rect(fill = "#F0F0F0")
)p1 / p2 +
plot_annotation(tag_levels = c("a", "b")) & theme(
panel.border = element_rect(fill = "transparent", linewidth = 0.5),
axis.ticks.length = unit(0.5, "mm"),
plot.margin = margin(b = 4, r = 1),
plot.tag.position = c(0.01, 0.95)
)ggsave("figures/fig-s4.pdf", width = 140, height = 160, units = "mm")set.seed(999)
nmds <- seqtab %>% # Full size fraction
inner_join(smd, by = "sample") %>%
filter(!is.na(age) | sample %in% t0 & groundwater == "KR0015") %>%
group_by(seqid) %>% filter(count > 2) %>% ungroup() %>%
select(seqid, sample, count) %>% spread(seqid, count, fill = 0) %>%
column_to_rownames("sample") %>% vegan::metaMDS()Square root transformation
Wisconsin double standardization
Run 0 stress 0.1204323
Run 1 stress 0.1401622
Run 2 stress 0.1321109
Run 3 stress 0.1410864
Run 4 stress 0.1233841
Run 5 stress 0.1285199
Run 6 stress 0.1257905
Run 7 stress 0.1374948
Run 8 stress 0.1369102
Run 9 stress 0.1250981
Run 10 stress 0.1155069
... New best solution
... Procrustes: rmse 0.09176254 max resid 0.4493124
Run 11 stress 0.1392887
Run 12 stress 0.1288962
Run 13 stress 0.1367449
Run 14 stress 0.1391916
Run 15 stress 0.1353133
Run 16 stress 0.1186726
Run 17 stress 0.1170498
Run 18 stress 0.1260413
Run 19 stress 0.1288964
Run 20 stress 0.1185728
*** Best solution was not repeated -- monoMDS stopping criteria:
15: stress ratio > sratmax
5: scale factor of the gradient < sfgrmin
vegan::scores(nmds)$sites %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
inner_join(smd, by = "sample") -> nmds.scoresi <- data.frame(x0 = -1,52, y0 = 0.5, 0.5)
p1 <- nmds.scores %>%
mutate(medium = factor(medium, levels = c("ace","lys","env"))) %>%
ggplot(aes(x = NMDS1, y = NMDS2,
shape = medium,
fill = fraction, color = fraction
)
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
geom_hline(yintercept = 0, linetype = 'dotted') +
geom_point(size = 4.0, stroke = 0.75) +
geom_text(data = nmds.scores,
aes(label = age),
size = 6/.pt, color = "black", na.rm = TRUE) +
scale_shape_manual(values = c("ace" = 23, "lys" = 24, "env" = 21),
labels = c("Acetate", "Lysate", "Environmental")) +
scale_color_manual(values = cols.fraction, labels = c("Environmental", "< 0.45 µm", "Non-fractionated")) +
scale_fill_manual(values = alpha(cols.fraction, alpha = 0.2), guide = "none") +
annotate('text', x = -Inf, y = -Inf, size = 6/.pt, hjust = -0.1, vjust = -0.5,
label = paste('Stress = ', round(nmds$stress, digits = 2))) +
theme_tidy(legend = "bottom", fontsize = 6) +
ggforce::geom_circle(aes(x0 = -1.54, y0 = 0.52, r = 0.45),
data = i,
inherit.aes = FALSE,
fill = NA, color = "black", linewidth = 0.3, linetype = "dashed") +
theme(
aspect.ratio = 1.0,
legend.title = element_blank(),
legend.box = "vertical",
legend.spacing.x = unit(0, "mm"),
legend.box.margin = margin(),
legend.margin = margin(),
legend.text = element_text(margin = margin(l = 2, r = 8, unit = "pt")),
legend.key.spacing.x = unit(0, "mm")
)t <- seqtab %>%
filter(sample %in% paste("P21015", seq(1042,1081), sep = "_")
) %>%
inner_join(tax, by = "seqid") %>%
group_by(phylum, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(relab = sum(relab), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean_relab = sum(relab), .groups = 'drop') %>%
filter(!is.na(phylum)) %>%
top_n(9, mean_relab) %>% filter(phylum != "Bacteroidota")
# The most abundant phyla, averaged over all cultures
t %>% arrange(desc(mean_relab))# A tibble: 8 × 2
phylum mean_relab
<chr> <dbl>
1 Pseudomonadota 10.8
2 Acidobacteriota 7.90
3 Spirochaetota 6.28
4 Patescibacteria 6.16
5 Desulfobacterota 3.22
6 Bacillota 1.54
7 Nanoarchaeota 0.967
8 Omnitrophota 0.853
taxref <- tax %>%
left_join(t %>% transmute(phylum, topphylum = phylum), by = "phylum") %>%
replace_na(list("topphylum" = "Other"))
cols.phylum <- c(
"Pseudomonadota" = "#972D15",
"Acidobacteriota" = "#FAEFD1",
"Spirochaetota" = "#D8B70A",
"Patescibacteria" = "#C7B19C",
"Desulfobacterota" = "#A2A475",
"Bacillota" = "#1B5331",
"Nanoarchaeota" = "#046C9A",
"Omnitrophota" = "#00A08A",
"Other" = "#899DA4"
)p2 <- seqtab %>%
filter(sample %in% paste("P21015", seq(1042,1081), sep = "_")
) %>%
inner_join(taxref, by = "seqid") %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, topphylum) %>%
summarise(relab = sum(relab), .groups = 'drop') %>%
inner_join(smd, by = "sample") %>%
mutate(age = as.numeric(.$age)) %>%
mutate(fraction = case_when(
fraction == "non-fractionated" & medium == "ace" ~ "Acetate, non-fractionated",
fraction == "non-fractionated" & medium == "lys" ~ "Lysate, non-fractionated",
fraction == "fractionated" & medium == "ace" ~ "Acetate, < 0.45 µm fraction",
fraction == "fractionated" & medium == "lys" ~ "Lysate, < 0.45 µm fraction"
)) %>%
mutate(topphylum = factor(topphylum, levels = rev(names(cols.phylum)))) %>%
# Call the plot
ggplot(aes(
age,
relab * 100,
fill = topphylum)) +
geom_col() +
scale_x_continuous(n.breaks = 10) +
facet_wrap(~ fraction, nrow = 2) +
labs(x = 'Week', y = 'Relative abundance (%)', fill = '') +
scale_fill_manual(values = cols.phylum) +
guides(fill = guide_legend(reverse = TRUE, nrow = 3)) +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
aspect.ratio = 1.0,
strip.background = element_blank(),
legend.background = element_blank(),
legend.key.height = unit(4, "mm"), legend.key.width = unit(3, "mm"),
legend.text = element_text(margin = margin(l = 2)),
legend.key.spacing.y = unit(0, "mm"),
axis.ticks.x = element_blank(), axis.text.x = element_text(vjust = 2),
legend.box.margin = margin(t = -5)
)p1 + p2 + plot_annotation(tag_levels = "a") & theme(
plot.tag.location = "plot",
plot.tag.position = c(0.05,0.98),
plot.margin = margin(r = 2),
legend.box.margin = margin(t = -10)
)ggsave("figures/fig-3.pdf", height = 11, width = 18, units = "cm")set.seed(999)
nmds <- seqtab %>% # Full size fraction
inner_join(smd, by = "sample") %>%
filter(is.na(age) & groundwater == "KR0015") %>%
group_by(seqid) %>% filter(count > 2) %>% ungroup() %>%
select(seqid, sample, count) %>% spread(seqid, count, fill = 0) %>%
column_to_rownames("sample") %>% vegan::metaMDS()Square root transformation
Wisconsin double standardization
Run 0 stress 0.1162664
Run 1 stress 0.1113097
... New best solution
... Procrustes: rmse 0.04054519 max resid 0.1910308
Run 2 stress 0.1113097
... New best solution
... Procrustes: rmse 7.963381e-06 max resid 2.967843e-05
... Similar to previous best
Run 3 stress 0.1113097
... Procrustes: rmse 0.0001018374 max resid 0.000389594
... Similar to previous best
Run 4 stress 0.1113097
... Procrustes: rmse 5.762553e-05 max resid 0.0002199511
... Similar to previous best
Run 5 stress 0.1162664
Run 6 stress 0.1113097
... Procrustes: rmse 3.109983e-05 max resid 0.0001187684
... Similar to previous best
Run 7 stress 0.1113099
... Procrustes: rmse 0.0002556252 max resid 0.0009758969
... Similar to previous best
Run 8 stress 0.1162664
Run 9 stress 0.1113097
... Procrustes: rmse 0.0001426061 max resid 0.0005440469
... Similar to previous best
Run 10 stress 0.1162664
Run 11 stress 0.1113097
... Procrustes: rmse 4.341062e-05 max resid 0.0001600677
... Similar to previous best
Run 12 stress 0.1162664
Run 13 stress 0.1162664
Run 14 stress 0.1527016
Run 15 stress 0.1113097
... Procrustes: rmse 4.266142e-05 max resid 0.0001623409
... Similar to previous best
Run 16 stress 0.1527132
Run 17 stress 0.1162664
Run 18 stress 0.1113097
... Procrustes: rmse 3.160745e-05 max resid 0.0001207792
... Similar to previous best
Run 19 stress 0.1527016
Run 20 stress 0.1829632
*** Best solution repeated 9 times
vegan::scores(nmds)$sites %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
inner_join(smd, by = "sample") -> nmds.scoresPlot the ordination
p1 <- nmds.scores %>%
mutate(medium = factor(medium, levels = c("ace","lys","env"))) %>%
ggplot(aes(x = NMDS1, y = NMDS2,
shape = medium,
fill = fraction, color = fraction
)
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
geom_hline(yintercept = 0, linetype = 'dotted') +
geom_point(size = 4.0, stroke = 0.75) +
geom_text(data = nmds.scores,
aes(label = age),
size = 6/.pt, color = "black", na.rm = TRUE) +
scale_shape_manual(values = c("ace" = 23, "lys" = 24, "env" = 21),
labels = c("Acetate", "Lysate", "Environmental")) +
scale_color_manual(values = cols.fraction, labels = c("Environmental", "< 0.45 µm", "Non-fractionated")) +
scale_fill_manual(values = alpha(cols.fraction, alpha = 0.2), guide = "none") +
annotate('text', x = -Inf, y = -Inf, size = 6/.pt, hjust = -0.1, vjust = -0.5,
label = paste('Stress = ', round(nmds$stress, digits = 2))) +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
#axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.title = element_blank(),
legend.box = "vertical",
legend.spacing.x = unit(0, "mm"),
legend.box.margin = margin(),
legend.margin = margin(),
legend.text = element_text(margin = margin(l = 2, r = 8, unit = "pt")),
legend.key.spacing.x = unit(0, "mm")
)set.seed(999)
nmds <- seqtab %>% # Full size fraction
inner_join(smd, by = "sample") %>%
filter(is.na(age) & groundwater == "SA1420") %>%
group_by(seqid) %>% filter(count > 2) %>% ungroup() %>%
select(seqid, sample, count) %>% spread(seqid, count, fill = 0) %>%
column_to_rownames("sample") %>% vegan::metaMDS()Square root transformation
Wisconsin double standardization
Run 0 stress 0.1463847
Run 1 stress 0.1728024
Run 2 stress 0.1827598
Run 3 stress 0.1463848
... Procrustes: rmse 0.0001759352 max resid 0.0004703476
... Similar to previous best
Run 4 stress 0.1463848
... Procrustes: rmse 0.0001427329 max resid 0.0004618032
... Similar to previous best
Run 5 stress 0.1728024
Run 6 stress 0.1711521
Run 7 stress 0.1858381
Run 8 stress 0.1463847
... Procrustes: rmse 6.710846e-05 max resid 0.0002414644
... Similar to previous best
Run 9 stress 0.1728024
Run 10 stress 0.1463848
... Procrustes: rmse 0.0001870214 max resid 0.0007340409
... Similar to previous best
Run 11 stress 0.1463846
... New best solution
... Procrustes: rmse 0.0001077695 max resid 0.0004026575
... Similar to previous best
Run 12 stress 0.1976911
Run 13 stress 0.1825336
Run 14 stress 0.1463846
... Procrustes: rmse 8.697484e-05 max resid 0.0003340923
... Similar to previous best
Run 15 stress 0.1737588
Run 16 stress 0.1867263
Run 17 stress 0.1753243
Run 18 stress 0.1463847
... Procrustes: rmse 0.0001946414 max resid 0.0007199926
... Similar to previous best
Run 19 stress 0.1463847
... Procrustes: rmse 0.000187155 max resid 0.0006815765
... Similar to previous best
Run 20 stress 0.1728024
*** Best solution repeated 4 times
vegan::scores(nmds)$sites %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
inner_join(smd, by = "sample") -> nmds.scoresp2 <- nmds.scores %>%
mutate(medium = factor(medium, levels = c("ace","lys","env"))) %>%
ggplot(aes(x = NMDS1, y = NMDS2,
shape = medium,
fill = fraction, color = fraction
)
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
geom_hline(yintercept = 0, linetype = 'dotted') +
geom_point(size = 4.0, stroke = 0.75) +
geom_text(data = nmds.scores,
aes(label = age),
size = 6/.pt, color = "black", na.rm = TRUE) +
scale_shape_manual(values = c("ace" = 23, "lys" = 24, "env" = 21),
labels = c("Acetate", "Lysate", "Environmental")) +
scale_color_manual(values = cols.fraction, labels = c("Environmental", "< 0.45 µm", "Non-fractionated")) +
scale_fill_manual(values = alpha(cols.fraction, alpha = 0.2), guide = "none") +
annotate('text', x = -Inf, y = -Inf, size = 6/.pt, hjust = -0.1, vjust = -0.5,
label = paste('Stress = ', round(nmds$stress, digits = 2))) +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
axis.title.y = element_blank(),
legend.title = element_blank(),
legend.box = "vertical",
legend.spacing.x = unit(0, "mm"),
legend.box.margin = margin(),
legend.margin = margin(),
legend.text = element_text(margin = margin(l = 2, r = 8, unit = "pt")),
legend.key.spacing.x = unit(0, "mm")
)set.seed(999)
nmds <- seqtab %>% # Full size fraction
inner_join(smd, by = "sample") %>%
filter(is.na(age) & groundwater == "SA2600") %>%
group_by(seqid) %>% filter(count > 2) %>% ungroup() %>%
select(seqid, sample, count) %>% spread(seqid, count, fill = 0) %>%
column_to_rownames("sample") %>% vegan::metaMDS()Square root transformation
Wisconsin double standardization
Run 0 stress 0.07165588
Run 1 stress 0.07165587
... New best solution
... Procrustes: rmse 6.783627e-05 max resid 0.0002209466
... Similar to previous best
Run 2 stress 0.1058458
Run 3 stress 0.07540527
Run 4 stress 0.07540524
Run 5 stress 0.1058377
Run 6 stress 0.1058456
Run 7 stress 0.07165587
... New best solution
... Procrustes: rmse 6.929929e-05 max resid 0.00022789
... Similar to previous best
Run 8 stress 0.113827
Run 9 stress 0.1289874
Run 10 stress 0.1058384
Run 11 stress 0.2466964
Run 12 stress 0.1058454
Run 13 stress 0.07165587
... Procrustes: rmse 7.472214e-05 max resid 0.0002464097
... Similar to previous best
Run 14 stress 0.07165587
... New best solution
... Procrustes: rmse 6.164708e-05 max resid 0.0002032928
... Similar to previous best
Run 15 stress 0.1058389
Run 16 stress 0.113827
Run 17 stress 0.07165591
... Procrustes: rmse 5.919096e-05 max resid 0.0001889389
... Similar to previous best
Run 18 stress 0.07165592
... Procrustes: rmse 8.455526e-05 max resid 0.0002765345
... Similar to previous best
Run 19 stress 0.105838
Run 20 stress 0.1226938
*** Best solution repeated 3 times
Warning in postMDS(out$points, dis, plot = max(0, plot - 1), ...): skipping
half-change scaling: too few points below threshold
vegan::scores(nmds)$sites %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
inner_join(smd, by = "sample") -> nmds.scoresp3 <- nmds.scores %>%
mutate(medium = factor(medium, levels = c("ace","lys","env"))) %>%
ggplot(aes(x = NMDS1, y = NMDS2,
shape = medium,
fill = fraction, color = fraction
)
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
geom_hline(yintercept = 0, linetype = 'dotted') +
geom_point(size = 4.0, stroke = 0.75) +
geom_text(data = nmds.scores,
aes(label = age),
size = 6/.pt, color = "black", na.rm = TRUE) +
scale_shape_manual(values = c("ace" = 23, "lys" = 24, "env" = 21),
labels = c("Acetate", "Lysate", "Environmental"), guide = "none") +
scale_color_manual(values = cols.fraction, labels = c("Environmental", "< 0.45 µm", "Non-fractionated"), guide = "none") +
scale_fill_manual(values = alpha(cols.fraction, alpha = 0.2), guide = "none") +
annotate('text', x = -Inf, y = -Inf, size = 6/.pt, hjust = -0.1, vjust = -0.5,
label = paste('Stress = ', round(nmds$stress, digits = 2))) +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
axis.title.y = element_blank()
)p1 + p2 + p3 + plot_layout(nrow = 1, guides = "collect") + plot_annotation(tag_levels = "a") & theme(
legend.position = "bottom",
aspect.ratio = 1.0,
plot.tag.location = "panel",
plot.tag.position = c(0.04, 0.97),
plot.margin = margin(t = 3, r = 3),
panel.border = element_rect(fill = "transparent", linewidth = 0.75)
)ggsave("figures/fig-s2.pdf", height = 90, width = 180, units = "mm")qpcr <- read_tsv('data/qpcr.tsv', col_types = cols(.default = col_character(),
quantity = col_double(),
age = col_factor(levels = seq(1:10) %>% as.character())
)
) %>% mutate(medium = gsub("ace","srm", medium)) %>%
rename(kingdom = Kingdom)p1 <- qpcr %>%
group_by(sample, kingdom) %>%
summarise(mean = mean(quantity), sd = sd(quantity), .groups = 'drop') %>%
inner_join(smd, by = "sample") %>%
mutate(category = paste(medium, fraction)) %>%
# Call plot
ggplot(aes(x = fct_relevel(age, seq(1:10) %>% as.character),
y = mean,
group = category,
fill = fraction
)
) +
geom_line(aes(linetype = medium)) +
facet_wrap(~ factor(kingdom, levels = c("Bacteria", "Archaea"))) +
geom_pointrange(aes(ymin = mean - sd,
ymax = mean + sd),
shape = 21, stroke = 0.5) +
labs(x = 'Week', y = expression("Gene copy number (mL"^"-1"*")")) +
scale_y_log10(breaks = c(1e3,1e4,1e5,1e6,1e7),
labels = c(expression("10"^"3"),
expression("10"^"4"),
expression("10"^"5"),
expression("10"^"6"),
expression("10"^"7")
)
) +
scale_linetype(name = "Medium:", labels = c("Acetate", "Cell lysate")) +
scale_fill_manual(values = cols.fraction,
labels = c("< 0.45 µm fraction", "Non-fractionated"),
name = "") +
theme_tidy(legend = "right") +
theme(
plot.margin = margin(),
legend.text = element_text(margin = margin(l = 0)),
axis.ticks.length = unit(0.5, "mm"),
panel.border = element_rect(fill = "transparent", linewidth = 0.75),
legend.box.margin = margin(-8),
)growth <- read_tsv("data/micro_cultures.tsv", col_types = cols(.default = col_character(),
count = col_integer(),
date = col_date(format = "%d-%m-%Y"))
) %>%
mutate(week = case_when(
date == "2021-10-15" ~ 1,
date == "2021-11-11" ~ 5,
date == "2021-11-25" ~ 7,
date == "2021-12-09" ~ 9,
date == "2021-12-30" ~ 12,
date == "2022-01-18" ~ 15,
date == "2022-02-03" ~ 17
)) %>%
filter(!sample %in% c("P26201_1051","P26201_1024")) %>%
mutate(label = case_when(
groundwater == "KR0015" ~ "Meteoric",
groundwater == "SA1420" ~ "Marine",
groundwater == "SA2600" ~ "Saline"
))
counts <- tibble(
label = c("Meteoric","Marine","Saline"),
min = c(680220, 59220, 14100),
max = c(890910, 73910, 23000)
)# Microscopy counts on KR0015 groundwater in duplicates, obtaining 680,220 and 890,910 cells ml-1.
# Microscopy counts on SA1420 groundwater in duplicates, obtaining 59220 and 73910 cells ml-1.
# For SA2600, this number was 14,100 and 23,000 cells ml-1
p2 <- growth %>%
ggplot(aes(x = week, y = count, fill = fraction)) +
geom_rect(data = counts, aes(xmin = 1, xmax = 17, ymin = min, ymax = max), fill = "#8D8680", alpha = 0.5, inherit.aes = FALSE) +
geom_line(aes(linetype = medium)) +
geom_point(aes(fill = fraction), shape = 21, stroke = 0.5, size = 2) +
scale_y_log10(limits = c(1e4,1e8),
breaks = c(1e4,1e5,1e6,1e7),
labels = c(expression("10"^"4"),
expression("10"^"5"),
expression("10"^"6"),
expression("10"^"7"))
) +
scale_x_continuous(breaks = c(1,5,7,9,12,15,17),
labels = c("1", "5", "", "9", "12", "", "17")) +
scale_fill_manual(values = cols.fraction, guide = "none") +
scale_linetype_manual(values = c("srm" = 1, "lys" = 2), name = "Medium:",
labels = c("Acetate", "Cell lysate"),
guide = "none") +
facet_wrap(~ factor(label, levels = c("Meteoric", "Marine", "Saline")), ncol = 3) +
labs(x = "Week", y = expression("Cell number (mL"^"-1"*")"), colour = "Fraction:") +
theme_tidy(legend = "bottom") +
theme(
legend.margin = margin(t = -10, l = 20),
plot.margin = margin(),
panel.border = element_rect(fill = "transparent", linewidth = 0.5)
)p1 + p2 + plot_layout(nrow = 2, heights = c(3,2), guides = "collect") + plot_annotation(tag_levels = "a") & theme(
aspect.ratio = 1.0,
plot.margin = margin(b = 6),
legend.position = "bottom",
plot.tag.location = "plot",
plot.tag.position = c(0.04, 0.96),
legend.box.margin = margin(t = -8)
)ggsave(filename = "figures/fig-2.pdf", width = 14, height = 14, units = "cm")t <- seqtab %>%
inner_join(smd, by = "sample") %>% filter(groundwater == "KR0015") %>%
filter(grepl("P26", sample)) %>%
inner_join(tax, by = "seqid") %>%
group_by(phylum, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(relab = sum(relab), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean_relab = sum(relab), .groups = 'drop') %>%
top_n(8, mean_relab) %>% arrange(desc(mean_relab)) %>% pull(phylum)cols.phylum <- c(
"Pseudomonadota" = "#972D15",
"Bacillota" = "#1B5331",
"Bacteroidota" = "#D3D4D8",
"Desulfobacterota" = "#A2A475",
"Patescibacteria" = "#C7B19C",
"Omnitrophota" = "#00A08A",
"Spirochaetota" = "#D8B70A",
"Campylobacterota" = "#79402E",
"Acidobacteriota" = "#FAEFD1",
"Nanoarchaeota" = "#046C9A",
"Actinomycetota" = "#C6CDF7",
"Verrucomicrobiota" = "#D69C4E",
"Other" = "#899DA4"
)p1 <- seqtab %>%
filter(grepl("P26", sample)) %>%
inner_join(tax, by = "seqid") %>%
mutate(phylum = if_else(phylum %in% t, phylum, "Other")) %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, phylum) %>%
summarise(relab = sum(relab), .groups = 'drop') %>%
inner_join(smd, by = "sample") %>% filter(groundwater == "KR0015") %>%
mutate(phylum = factor(phylum, levels = names(rev(cols.phylum)))) %>%
mutate(fraction = case_when(
fraction == "fractionated" & medium == "lys" ~ "Fractionated: lysate",
fraction == "fractionated" & medium == "ace" ~ "Fractionated: acetate",
fraction == "non-fractionated" & medium == "lys" ~ "Non-fractionated: lysate",
fraction == "non-fractionated" & medium == "ace" ~ "Non-fractionated: acetate",
)) %>%
# Call the plot
ggplot(aes(
sample,
relab * 100,
fill = phylum)) +
geom_col(show.legend = TRUE) +
facet_wrap(~ fraction, scales = "free_x", nrow = 1) +
labs(x = "", y = 'Relative abundance (%)', fill = '') +
scale_fill_manual(values = cols.phylum, drop = FALSE) +
theme_tidy(legend = "bottom") + guides(fill = guide_legend(nrow = 3, reverse = TRUE)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.key.width = unit(3.5, "mm"),
legend.text = element_text(margin = margin(l = 2)),
legend.margin = margin(t = -10)
)t <- seqtab %>%
inner_join(smd, by = "sample") %>% filter(groundwater == "SA1420") %>%
filter(grepl("P26", sample)) %>%
inner_join(tax, by = "seqid") %>%
group_by(phylum, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(relab = sum(relab), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean_relab = sum(relab), .groups = 'drop') %>%
top_n(8, mean_relab) %>% arrange(desc(mean_relab)) %>% pull(phylum)p2 <- seqtab %>%
filter(grepl("P26", sample)) %>%
inner_join(tax, by = "seqid") %>%
mutate(phylum = if_else(phylum %in% t, phylum, "Other")) %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, phylum) %>%
summarise(relab = sum(relab), .groups = 'drop') %>%
inner_join(smd, by = "sample") %>% filter(groundwater == "SA1420") %>%
mutate(phylum = factor(phylum, levels = names(rev(cols.phylum)))) %>%
mutate(fraction = case_when(
fraction == "fractionated" & medium == "lys" ~ "Fractionated: lysate",
fraction == "fractionated" & medium == "ace" ~ "Fractionated: acetate",
fraction == "non-fractionated" & medium == "lys" ~ "Non-fractionated: lysate",
fraction == "non-fractionated" & medium == "ace" ~ "Non-fractionated: acetate",
)) %>%
# Call the plot
ggplot(aes(
sample,
relab * 100,
fill = phylum)) +
geom_col(show.legend = TRUE) +
facet_wrap(~ fraction, scales = "free_x", nrow = 1) +
labs(x = "", y = 'Relative abundance (%)', fill = '') +
scale_fill_manual(values = cols.phylum, drop = FALSE) +
theme_tidy(legend = "bottom") + guides(fill = guide_legend(nrow = 3, reverse = TRUE)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.key.width = unit(3.5, "mm"),
legend.text = element_text(margin = margin(l = 2)),
legend.margin = margin(t = -10)
)t <- seqtab %>%
inner_join(smd, by = "sample") %>% filter(groundwater == "SA2600") %>%
filter(grepl("P26", sample)) %>%
inner_join(tax, by = "seqid") %>%
group_by(phylum, sample) %>%
# Sum the abundance of each phylum within a sample
summarise(relab = sum(relab), .groups = 'drop_last') %>%
# Calculate the mean abundance of each phylum over the categories
summarise(mean_relab = sum(relab), .groups = 'drop') %>%
top_n(8, mean_relab) %>% arrange(desc(mean_relab)) %>% pull(phylum)p3 <- seqtab %>%
filter(grepl("P26", sample)) %>%
inner_join(tax, by = "seqid") %>%
mutate(phylum = if_else(phylum %in% t, phylum, "Other")) %>%
# Summarize in order to have the sum for each category and topphylum
group_by(sample, phylum) %>%
summarise(relab = sum(relab), .groups = 'drop') %>%
inner_join(smd, by = "sample") %>% filter(groundwater == "SA2600") %>%
mutate(phylum = factor(phylum, levels = names(rev(cols.phylum)))) %>%
mutate(fraction = case_when(
fraction == "fractionated" & medium == "lys" ~ "Fractionated: lysate",
fraction == "fractionated" & medium == "ace" ~ "Fractionated: acetate",
fraction == "non-fractionated" & medium == "lys" ~ "Non-fractionated: lysate",
fraction == "non-fractionated" & medium == "ace" ~ "Non-fractionated: acetate",
)) %>%
# Call the plot
ggplot(aes(
sample,
relab * 100,
fill = phylum)) +
geom_col(show.legend = TRUE) +
facet_wrap(~ fraction, scales = "free_x", nrow = 1) +
labs(x = "", y = 'Relative abundance (%)', fill = '') +
scale_fill_manual(values = cols.phylum, drop = FALSE) +
theme_tidy(legend = "bottom") + guides(fill = guide_legend(nrow = 3, reverse = TRUE)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.key.width = unit(3.5, "mm"),
legend.text = element_text(margin = margin(l = 2)),
legend.margin = margin(t = -10)
)p1 + p2 + p3 + plot_layout(nrow = 3, ncol = 1, guides = "collect", widths = c(2,2,1)) +
plot_annotation(tag_levels = "a") & theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
plot.tag.location = "panel",
plot.tag.position = c(-0.05, 1.06),
legend.position = "bottom",
legend.box.margin = margin(t = 10),
aspect.ratio = 1
)ggsave(filename = "figures/fig-s3.pdf", width = 180, height = 180, units = "mm")t <- c("Bacillota","Desulfobacterota","Pseudomonadota","Patescibacteria","Nanoarchaeota","Other")
# Prepare data for plot
i <- quality %>%
inner_join(bintax, by = "genome") %>%
mutate(phylum = ifelse(phylum %in% t, phylum, "Other")) %>%
mutate(phylum = factor(phylum, levels = t)) %>%
select(genome, phylum, completeness, gc_content, genome_size, coding_density) %>%
distinct() %>%
mutate(genome.size = genome_size / 1e6)lm(gc_content ~ genome_size, data = quality) %>% summary()
Call:
lm(formula = gc_content ~ genome_size, data = quality)
Residuals:
Min 1Q Median 3Q Max
-0.15655 -0.06232 -0.02604 0.07979 0.13851
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.058e-01 3.670e-02 8.332 1.26e-09 ***
genome_size 5.080e-08 1.029e-08 4.935 2.23e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.09515 on 33 degrees of freedom
Multiple R-squared: 0.4246, Adjusted R-squared: 0.4072
F-statistic: 24.36 on 1 and 33 DF, p-value: 2.233e-05
p1 <- i %>%
ggplot(aes(
genome.size,
gc_content,
color = phylum,
fill = phylum
)) +
geom_smooth(method = "lm", formula = y ~ x, colour = "black", fill = "#d9d9d9", se = TRUE, linewidth = 0.4) +
geom_point(size = 2.5, shape = 21, stroke = 0.5, key_glyph = "rect") +
labs(x = "Estimated genome size (Mb)", y = "Estimated GC content (%)", color = "") +
scale_colour_manual(values = cols.mag, guide = "none") +
scale_fill_manual(values = alpha(cols.mag, alpha = 0.5)) +
theme_tidy(fontsize = 6) +
scale_x_continuous(breaks = c(1,2,3,4,5,6)) +
annotate("text", x = Inf, y = -Inf,
label = "paste(R ^ 2, \" = 0.41\")",
size = 6/.pt, vjust = -0.5, hjust = 1, parse = TRUE) +
theme(
axis.line = element_line(),
panel.border = element_blank()
)p2 <- i %>%
mutate(phylum = factor(phylum, levels = c(
"Nanoarchaeota", "Patescibacteria", "Other", "Bacillota", "Desulfobacterota", "Pseudomonadota"
))) %>%
ggplot(aes(
phylum,
completeness,
group = phylum,
fill = phylum,
color = phylum
)) +
geom_boxplot(linewidth = 0.5, outlier.colour = "white") +
geom_jitter(width = 0.2, size = 2, shape = 21, stroke = 0.5) +
labs(x = "", y = "Estimated completeness (%)", fill = "") +
scale_fill_manual(values = alpha(cols.mag, alpha = 0.5), guide = "none") +
scale_color_manual(values = cols.mag, guide = "none") +
theme_tidy(legend = "bottom", fontsize = 6) +
theme(
axis.line = element_line(),
panel.border = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(margin = margin()),
plot.margin = margin()
)lm(coding_density ~ genome_size, data = quality) %>% summary()
Call:
lm(formula = coding_density ~ genome_size, data = quality)
Residuals:
Min 1Q Median 3Q Max
-0.05556 -0.01303 -0.00046 0.01275 0.03787
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.172e-01 8.112e-03 113.064 < 2e-16 ***
genome_size -6.991e-09 2.275e-09 -3.072 0.00424 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.02103 on 33 degrees of freedom
Multiple R-squared: 0.2224, Adjusted R-squared: 0.1989
F-statistic: 9.439 on 1 and 33 DF, p-value: 0.004236
p3 <- i %>%
ggplot(aes(
genome.size,
coding_density,
color = phylum,
fill = phylum
)) +
geom_smooth(method = "lm", formula = y ~ x, colour = "black", fill = "#d9d9d9", se = TRUE, linewidth = 0.4) +
geom_point(size = 2.5, shape = 21, stroke = 0.5) +
labs(x = "Estimated genome size (Mb)", y = "Estimated coding density", color = "") +
scale_colour_manual(values = cols.mag, guide = "none") +
scale_fill_manual(values = alpha(cols.mag, alpha = 0.5), guide = "none") +
theme_tidy(fontsize = 6) +
scale_x_continuous(breaks = c(1,2,3,4,5,6)) +
scale_y_continuous(n.breaks = 7) +
annotate("text", x = 6, y = 0.93,
label = "paste(R ^ 2, \" = 0.20\")",
size = 6/.pt, vjust = 1, hjust = 1, parse = TRUE) +
theme(
axis.line = element_line(),
panel.border = element_blank()
)nmds <- coverm %>%
# Sum the abundance of both coverage per metagenome
mutate(abundance = 1) %>%
inner_join(emapper, by = "genome", relationship = "many-to-many") %>%
# Filter out NA and add multiple KOs on new line
separate_rows(ko, sep = ",") %>%
# Sum the abundance of each KO within the metagenomes
group_by(genome, ko) %>%
summarise(abundance = sum(abundance), .groups = "drop") %>%
# Pivot to prepare a numerical matrix
pivot_wider(names_from = "ko", values_from = "abundance", values_fill = 0) %>%
column_to_rownames("genome") %>%
vegan::metaMDS()Square root transformation
Wisconsin double standardization
Run 0 stress 0.06432893
Run 1 stress 0.06458387
... Procrustes: rmse 0.06357583 max resid 0.1237997
Run 2 stress 0.06407586
... New best solution
... Procrustes: rmse 0.06501158 max resid 0.1229768
Run 3 stress 0.06649645
Run 4 stress 0.06967825
Run 5 stress 0.06485684
Run 6 stress 0.06627825
Run 7 stress 0.06999917
Run 8 stress 0.06432922
... Procrustes: rmse 0.06485113 max resid 0.1250963
Run 9 stress 0.06574132
Run 10 stress 0.06486508
Run 11 stress 0.06672071
Run 12 stress 0.06681644
Run 13 stress 0.0691574
Run 14 stress 0.06509552
Run 15 stress 0.06552364
Run 16 stress 0.06564337
Run 17 stress 0.06432906
... Procrustes: rmse 0.06487116 max resid 0.1250532
Run 18 stress 0.06568593
Run 19 stress 0.0641662
... Procrustes: rmse 0.002380285 max resid 0.01093463
Run 20 stress 0.065016
*** Best solution was not repeated -- monoMDS stopping criteria:
2: no. of iterations >= maxit
18: stress ratio > sratmax
nmds.scores <- vegan::scores(nmds)$sites %>%
as.data.frame() %>%
rownames_to_column("genome")p4 <- nmds.scores %>%
inner_join(bintax, by = "genome") %>%
mutate(phylum = ifelse(phylum %in% t, phylum, "Other")) %>%
# Plot
ggplot(aes(x = NMDS1 * -1,
y = NMDS2,
colour = fct_relevel(phylum,t),
fill = phylum
)) +
geom_vline(xintercept = 0, linetype = 'dotted', linewidth = 0.5) +
geom_hline(yintercept = 0, linetype = 'dotted', linewidth = 0.5) +
# Points for samples, coloured by nature
geom_point(size = 3, shape = 21, stroke = 0.5) +
theme_tidy(fontsize = 6) + labs(x = "NMDS1", y = "NMDS2") +
scale_color_manual(values = cols.mag, name = "Phylum", guide = "none") +
scale_fill_manual(values = alpha(cols.mag, alpha = 0.5), name = "", guide = "none") +
scale_y_continuous(breaks = c(-0.5, 0, 0.5)) +
annotate('text', x = -Inf, y = -Inf, size = 6/.pt,
label = paste('Stress = ', round(nmds$stress, digits = 2)),
hjust = -0.1, vjust = -0.5) +
theme(
panel.border = element_rect(fill = "transparent", linewidth = 0.75),
axis.title.y = element_text(margin = margin()),
legend.spacing.x = unit(-0.5, "mm")
)p1 + p2 + p3 + p4 + plot_layout(guides = "collect", nrow = 2) & plot_annotation(tag_levels = c("a", "b", "c", "d")) &
theme(
aspect.ratio = 1.0,
plot.margin = margin(),
plot.tag.location = "panel",
plot.tag.position = c(0.04, 0.95),
legend.position = "bottom",
axis.ticks.length = unit(".75", "mm"),
legend.margin = margin(t = -4),
legend.text = element_text(hjust = 0),
legend.key.size = unit("3", "mm"),
legend.title = element_blank()
)ggsave(filename = "figures/fig-4.pdf", width = 12, height = 12, units = "cm")km <- data.frame()
for (file in list.files("data/kmapper/", full.names = TRUE)) {
genome <- substr(file, 15, nchar(file)) %>% gsub(".txt", "", .)
i <- read_tsv(file, col_names = FALSE, show_col_types = FALSE)
i <- i %>% filter(grepl("M00", X1)) %>%
mutate(genome = genome) %>%
mutate(module = substr(X1, 1, 6)) %>%
mutate(completeness = sub(".*\\s", "", X1)) %>%
mutate(completeness = gsub(")", "", completeness)) %>%
select(-X1)
km <- km %>% rbind(i)
}
km <- km %>%
mutate(genes = gsub("/.*", "", completeness) %>% as.numeric()) %>%
mutate(total = gsub(".*/", "", completeness) %>% as.numeric()) %>%
mutate(score = case_when(
genes == total ~ "c",
(total - genes) == 1 ~ "i",
.default = "m"
)) %>%
select(genome, module, score) %>%
# Assign a value for every genome-module combination
pivot_wider(names_from = "module", values_from = "score", values_fill = "m") %>%
pivot_longer(-1, values_to = "score", names_to = "module")km <- km %>% mutate(label = case_when(
module == "M00001" ~ "M00001: Glycolysis",
module == "M00002" ~ "M00002: Glycolysis core module",
#module == "M00003" ~ "M00003: Gluconeogenesis",
module == "M00307" ~ "M00307: Pyruvate oxidation",
module == "M00008" ~ "M00008: Enter-Doudoroff pathway",
module == "M00009" ~ "M00009: TCA cycle",
module == "M00004" ~ "M00004: Pentose phosphate pathway (PPP)",
module == "M00007" ~ "M00007: PPP, non-oxidative",
module == "M00082" ~ "M00082: Fatty acids biosynthesis",
module == "M00549" ~ "M00549: UDP-Glc biosynthesis",
module == "M00909" ~ "M00909: UDP-GlcNAc biosynthesis",
#module == "M00020" ~ "M00020: AA biosynthesis: serine",
module == "M00016" ~ "M00016: AA biosynthesis: lysine",
module == "M00021" ~ "M00021: AA biosynthesis: cysteine",
module == "M00019" ~ "M00019: AA biosynthesis: valine/isoleucine",
#module == "M00017" ~ "M00017: AA biosynthesis: methionine",
#module == "M00048" ~ "M00048: De novo purine biosynthesis",
module == "M00053" ~ "M00053: Deoxyribonucleotide biosynthesis",
#module == "M00051" ~ "M00051: De novo pyrimidine biosynthesis",
module == "M00052" ~ "M00052: Pyrimidine ribonucleotide biosynthesis",
.default = NA
))
cols.phylum <- c(
"Desulfobacterota" = "#A2A475",
"Pseudomonadota" = "#972D15",
"Bacillota" = "#1B5331",
"Patescibacteria" = "#C7B19C",
"Nanoarchaeota" = "#00A08A",
"Chloroflexota" = "#D69C4E",
"Spirochaetota" = "#D8B70A",
"Campylobacterota" = "#FAEFD1",
"Iainarchaeota" = "#046C9A"
)
name <- bintax %>%
filter(genome %in% km$genome) %>%
mutate(phylum = factor(phylum, levels = names(cols.phylum))) %>%
arrange(phylum) %>%
mutate(genus = case_match(genus, NA ~ family, .default = genus)) %>%
mutate(taxon = if_else(is.na(species), paste(genus, "sp."), species)) %>%
pull(taxon) %>% unique()p1 <- km %>%
inner_join(bintax, by = "genome") %>%
mutate(genus = case_match(genus, NA ~ family, .default = genus)) %>%
mutate(taxon = if_else(is.na(species), paste(genus, "sp."), species)) %>%
mutate(taxon = factor(taxon, levels = name)) %>%
filter(!is.na(label)) %>%
ggplot(aes(
x = taxon,
y = fct_rev(label),
fill = score
)) +
geom_tile(color = "white") +
scale_x_discrete(position = "top") +
scale_fill_manual(values = c("c" = "#707151",
"i" = "#B6B79B",
"m" = "#F0F1EB"),
label = c("c" = "Complete",
"i" = "One block missing",
"m" = "Incomplete or missing"),
name = "Module completeness:") +
theme_tidy(fontsize = 6) + coord_fixed() +
geom_vline(xintercept = 7.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 13.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 19.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 22.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 24.5, linewidth = 0.6, color = "white") +
annotate("segment", x = 0.5, xend = 7.45, y = 16, yend = 16, color = "#A2A475", linewidth = 2) +
annotate("segment", x = 7.55, xend = 13.45, y = 16, yend = 16, color = "#972D15", linewidth = 2) +
annotate("segment", x = 13.55, xend = 19.45, y = 16, yend = 16, color = "#1B5331", linewidth = 2) +
annotate("segment", x = 19.55, xend = 22.45, y = 16, yend = 16, color = "#C7B19C", linewidth = 2) +
annotate("segment", x = 22.55, xend = 24.45, y = 16, yend = 16, color = "#00A08A", linewidth = 2) +
annotate("segment", x = 24.55, xend = 25.45, y = 16, yend = 16, color = "#D69C4E", linewidth = 2) +
annotate("segment", x = 25.55, xend = 26.45, y = 16, yend = 16, color = "#D8B70A", linewidth = 2) +
annotate("segment", x = 26.55, xend = 27.45, y = 16, yend = 16, color = "#FAEFD1", linewidth = 2) +
annotate("segment", x = 27.55, xend = 28.45, y = 16, yend = 16, color = "#046C9A", linewidth = 2) +
theme(
legend.position = "bottom",
legend.key.height = unit(3, "mm"),
legend.text = element_text(margin = margin(l = 2)),
legend.margin = margin(t = -10),
axis.text.x.top = element_text(angle = 90, hjust = 0, vjust = 0.6),
axis.ticks = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank()
)# Two MAGs have no metabolic weight (i.e. are either low abundant or have low metabolic potential)
i <- list(
genome = c("MEGAHIT-MaxBin2Refined-VK-3516-KFM02a-3_S40_L002.026", "MEGAHIT-MetaBAT2Refined-VK-3516-KFM02a-3_S40_L002.49_sub"),
pathway = c("Fermentation", "Fermentation"),
mw = c(0, 0))
mw <- read_tsv("data/metabolic.tsv.gz", show_col_types = FALSE) %>%
select(-sample, -mw.function) %>%
rbind(i) %>%
group_by(genome, pathway) %>% summarise(mw = sum(mw), .groups = "drop") %>%
pivot_wider(names_from = "pathway", values_from = "mw", values_fill = 0) %>%
pivot_longer(-1, names_to = "pathway", values_to = "mw") %>%
# Prepare the labels for the plot
mutate(label = pathway) %>%
mutate(label = gsub(".arbon", "C", label)) %>%
mutate(label = gsub(" - ", ": ", label)) %>%
mutate(label = gsub("amino acid", "AA", label)) %>%
mutate(label = gsub(" pathway", "", label)) %>%
mutate(label = gsub("\\|\\|vnfDKG\\|\\|", ", ", label)) %>%
mutate(label = gsub(" degradation", "", label)) %>%
mutate(label = gsub("methanol oxidation", "methanol", label)) %>%
mutate(label = gsub("Iron oxidation:", "Iron oxidation", label)) %>%
mutate(label = gsub("N2", "N", label))d <- c(
"Organic C oxidation: complex C",
"Organic C oxidation: AA utilization",
"Organic C oxidation: aromatics",
#"Organic C oxidation: methanol",
"Organic C oxidation: fatty acid",
"C fixation: CBB cycle (Rubisco)",
"C fixation: Reverse TCA cycle",
"C fixation: 3HP/4HB",
"C fixation: Wood-Ljungdahl",
"Acetate oxidation",
"Fermentation",
#"Hydrogen generation",
#"Hydrogen oxidation",
"N fixation: nifDK, nifH",
"Sulfur oxidation: sdo",
"Sulfate reduction",
"Sulfide oxidation: sqr",
"Iron oxidation",
"Iron reduction"
) %>% rev()
cols.phylum <- c(
"Desulfobacterota" = "#A2A475",
"Pseudomonadota" = "#972D15",
"Bacillota" = "#1B5331",
"Patescibacteria" = "#C7B19C",
"Nanoarchaeota" = "#00A08A",
"Chloroflexota" = "#D69C4E",
"Spirochaetota" = "#D8B70A",
"Campylobacterota" = "#FAEFD1",
"Iainarchaeota" = "#046C9A")name <- bintax %>%
filter(phylum %in% names(cols.phylum)) %>%
mutate(phylum = factor(phylum, levels = names(cols.phylum))) %>%
arrange(phylum) %>%
mutate(genus = case_match(genus, NA ~ family, .default = genus)) %>%
mutate(taxon = if_else(is.na(species), paste(genus, "sp."), species)) %>%
pull(taxon) %>% unique()p2 <- mw %>%
inner_join(bintax, by = "genome") %>%
mutate(genus = case_match(genus, NA ~ family, .default = genus)) %>%
mutate(taxon = if_else(is.na(species), paste(genus, "sp."), species)) %>%
# Order the genomes
filter(taxon %in% name) %>%
mutate(taxon = factor(taxon, name)) %>%
# Order the pathways
filter(label %in% d) %>%
mutate(label = factor(label, d)) %>%
mutate(score = if_else(mw > 0, "c", "m")) %>%
# Plot
ggplot(aes(
x = taxon,
y = label,
fill = score
)) +
geom_tile(color = "white") +
scale_fill_manual(values = c("c" = "#707151",
"m" = "#F0F1EB"),
label = c("c" = "Present",
"m" = "Absent")) +
theme_tidy(fontsize = 6) + coord_fixed() +
geom_vline(xintercept = 7.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 13.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 19.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 22.5, linewidth = 0.6, color = "white") +
geom_vline(xintercept = 24.5, linewidth = 0.6, color = "white") +
annotate("segment", x = 0.5, xend = 7.45, y = 17, yend = 17, color = "#A2A475", linewidth = 2) +
annotate("segment", x = 7.55, xend = 13.45, y = 17, yend = 17, color = "#972D15", linewidth = 2) +
annotate("segment", x = 13.55, xend = 19.45, y = 17, yend = 17, color = "#1B5331", linewidth = 2) +
annotate("segment", x = 19.55, xend = 22.45, y = 17, yend = 17, color = "#C7B19C", linewidth = 2) +
annotate("segment", x = 22.55, xend = 24.45, y = 17, yend = 17, color = "#00A08A", linewidth = 2) +
annotate("segment", x = 24.55, xend = 25.45, y = 17, yend = 17, color = "#D69C4E", linewidth = 2) +
annotate("segment", x = 25.55, xend = 26.45, y = 17, yend = 17, color = "#D8B70A", linewidth = 2) +
annotate("segment", x = 26.55, xend = 27.45, y = 17, yend = 17, color = "#FAEFD1", linewidth = 2) +
annotate("segment", x = 27.55, xend = 28.45, y = 17, yend = 17, color = "#046C9A", linewidth = 2) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.spacing.x = unit(0, "mm"),
legend.box.margin = margin(),
panel.border = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank(),
legend.margin = margin(t = -10),
legend.key.height = unit(3, "mm")
)p1 / p2 + plot_annotation(tag_levels = 'a') & theme(
legend.box.margin = margin(l = -8),
plot.margin = margin(b = 6),
legend.key.width = unit(2, "mm"),
legend.text = element_text(margin = margin(l = 2)),
plot.tag.location = 'panel',
plot.tag.position = c(-0.05, 1.03)
)ggsave("figures/fig-5.pdf", width = 180, height = 180, units = "mm")